# Cleveland dot plot on renewable energy Faceted by regions
# on 2017, 2018 and diverging stacked barchart on growth
library(tidyverse)
library(readxl)
renewables_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Renewables Generation by source")
# Add continents
continents <- c(rep("north_america", 3), rep("central_america", 8), rep("europe", 20), rep("CIS", 6), rep("middle_east", 8), rep("africa", 4), rep("asia_pacific", 17))
# Data processing
renewables_generation_df <- renewables_generation[4:86,] %>%
rename(country = `Renewable Energy: Generation by source*`, wind_2017 = `...2`, solar_2017 = `...3`, wind_2018 = `...6`, solar_2018 = `...7`, wind_growth = `...10`, solar_growth = `...11`) %>%
filter(!str_detect(country, "Other")) %>%
filter(!str_detect(country, "Total")) %>%
add_column(continent = continents) %>%
mutate_at(c('wind_2017', 'solar_2017', 'wind_2018', 'solar_2018', 'wind_growth', 'solar_growth'), funs(as.numeric)) %>%
gather('wind_2018', 'solar_2018', key = "energy_type", value = "generation")
# create a theme for dot plots, which can be reused
theme_dotplot <- theme_bw(14) +
theme(axis.text.y = element_text(size = rel(.75)),
axis.ticks.y = element_blank(),
axis.title.x = element_text(size = rel(.75)),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(size = 0.5),
panel.grid.minor.x = element_blank())
# Create labels
right_label <- renewables_generation_df %>%
group_by(country) %>%
arrange(desc(generation)) %>%
top_n(1)
left_label <- renewables_generation_df %>%
group_by(country) %>%
arrange(desc(generation)) %>%
slice(2)
# Plot cleveland
ggplot(renewables_generation_df, aes(x = generation, y = reorder(country, generation))) +
geom_line(aes(group = country)) +
geom_point(aes(color = energy_type)) +
facet_grid(continent ~ ., scales = "free", space = "free") +
geom_text(data = right_label, aes(color = energy_type, label = round(generation, 0)), size = 3, hjust = -.5) +
geom_text(data = left_label, aes(color = energy_type, label = round(generation, 0)), size = 3, hjust = 1.5) +
labs(title = "Renewable energy generation sources in 2018", subtitle="Source: BP Stats Review") +
ylab("") +
xlab("Total energy generation") +
xlim(0, 400) +
theme_dotplot

# Time series of CO_2
carbon_dioxide_emissions <- read_excel("data/bp-stats-review-2019-all-data.xlsx", col_names = FALSE, sheet="Carbon Dioxide Emissions")
# Set the column names
colnames(carbon_dioxide_emissions) <- carbon_dioxide_emissions[3,]
# Data Processing
carbon_dioxide_emissions_df <- carbon_dioxide_emissions[5:107, 1:55] %>%
rename(country = `Million tonnes of carbon dioxide`) %>%
drop_na() %>%
filter(!str_detect(country, "Other")) %>%
filter(!str_detect(country, "Total")) %>%
filter(country == "China" | country == "India" | country == "Japan" | country == "Germany" | country == "United Kingdom" | country == "US")
# Get China, India, US, Germany, UK, Japan
# Tranpose
carbon_dioxide_emissions_df <- carbon_dioxide_emissions_df %>%
rownames_to_column %>%
gather(variable, value, -rowname) %>%
spread(rowname, value)
# Rename
colnames(carbon_dioxide_emissions_df) <- tail(carbon_dioxide_emissions_df, 1)
# Delete last row, mutate values, gather
carbon_dioxide_emissions_df <-carbon_dioxide_emissions_df[-55,] %>%
rename(time = country) %>%
mutate_at(c('time', 'US', 'Germany', 'United Kingdom', 'China', 'India', 'Japan'), funs(as.numeric)) %>%
gather('US', 'Germany', 'United Kingdom', 'China', 'India', 'Japan', key = "country", value = "CO2")
# Actually an interesting article
# https://www.carbonbrief.org/guest-post-chinas-co2-emissions-grew-slower-than-expected-in-2018
# Plot TS faceted
# carbon_dioxide_emissions_df %>%
# ggplot(aes(time, CO2, color=country)) +
# facet_grid(country ~ ., scales = "free") +
# geom_line() +
# theme(legend.position = "none")
# Plot TS of top 6 countries
carbon_dioxide_emissions_df %>%
ggplot(aes(time, CO2, color=country)) +
geom_line()

# DO SOMETHING LIKE THIS https://www.carbonbrief.org/mapped-worlds-coal-power-plants
# ANd the slider thingy like this: https://www.visualcapitalist.com/all-the-worlds-coal-power-plants-in-one-map/
# Findings
# 1) CO2 produced mainly by coal factories
# 2) Seems like Renewables and Nuclear actually help reduce C02 by relying less on coal
# Plot TS of CHINA but with when they joined the World Trade Organization
# Super relevant article: https://www.carbonbrief.org/guest-post-chinas-co2-emissions-grew-slower-than-expected-in-2018
carbon_dioxide_emissions_df %>%
filter(country == "China") %>%
ggplot(aes(time, CO2, color=country)) +
geom_line() +
annotate("rect", xmin=2000.5, xmax=2001.5, ymin=-Inf, ymax=Inf, alpha=0.2, fill="blue") +
annotate("text", x = 2001, y = 8000, label = "China joins WTO") +
annotate("rect", xmin=2013, xmax=2016, ymin=-Inf, ymax=Inf, alpha=0.2, fill="blue") +
annotate("text", x = 2013, y =5000, label = "In 2014, \n China expands \n renewable energy/ \n nuclear power; \n causing CO2 to fall \n \n In 2017, \n coal consumption \n grew again ")

# Parallel Coordinate Plots on Primary Energy consumption by fuel
library(GGally)
primary_energy_consumption <- read_excel("data/bp-stats-review-2019-all-data.xlsx", col_names = FALSE, sheet="Primary Energy - Cons by fuel")
# Add continents
continents <- c(rep("north_america", 3), rep("central_america", 8), rep("europe", 20), rep("CIS", 6), rep("middle_east", 8), rep("africa", 4), rep("asia_pacific", 17))
# Data processing
primary_energy_consumption_df <- primary_energy_consumption[5:87,] %>%
rename(country = `...1` , oil = `...9`, natural_gas = `...10`, coal = `...11`, nuclear_energy = `...12`, hydro_electric = `...13`, renewables = `...14`) %>%
filter(!str_detect(country, "Other")) %>%
filter(!str_detect(country, "Total")) %>%
add_column(continent = continents) %>%
mutate_at(c('oil', 'natural_gas', 'coal', 'nuclear_energy', 'hydro_electric', 'renewables'), funs(as.numeric)) %>%
select(country, oil, natural_gas, coal, nuclear_energy, hydro_electric, renewables, continent)
# GGally::ggparcoord(primary_energy_consumption_df, columns=9:14, scale="uniminmax", title="PCP")
# Create an interactive parallel coordinate plot.
# Cannot fct reorder countries
parcoords::parcoords(primary_energy_consumption_df,
rownames = FALSE,
alpha = 0.5,
brushMode = "1D-axes-multi",
color = list(colorScale = "scaleOrdinal",
colorBy = "continent",
colorScheme = "schemeCategory10"),
height=600,
width=800,
withD3 = TRUE)
# Missing data analysis on Carbon Emissions
process_space_time_data <- function(data, x_start, x_end, y_start, y_end) {
# Super useful function to convert Space-Time data into "almost" tidy data
# "almost" tidy because countries can still apply gather by key-value
# But this is easier for D3
# Replace columns names
colnames(data) <- data[2,]
# Filter countries only
data_df <- data[y_start:y_end,x_start:x_end] %>%
rename(country = colnames(data)[1]) %>%
drop_na() %>%
filter(!str_detect(country, "Other")) %>%
filter(!str_detect(country, "Total"))
# Transpose
data_df <- data_df %>%
rownames_to_column %>%
gather(variable, value, -rowname) %>%
spread(rowname, value)
# Rename
colnames(data_df) <- tail(data_df, 1)
# Delete last row, rename
data_df <- data_df[-x_end,] %>%
mutate_at(vars(-country), funs(as.numeric)) %>%
rename(year = `country`)
return(data_df)
}
# LOL
# The USSR dissolved in 1991
# Missing data for carbon
carbon <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Carbon Dioxide Emissions")
carbon_missing_df <- process_space_time_data(carbon, x_start=1, x_end=55, y_start=4, y_end=107)
carbon_missing_df <- carbon_missing_df %>%
gather(-year, key = "country", value = "carbon_emissions") %>%
mutate(Missing = ifelse(is.na(carbon_emissions), "yes", "no"))
ggplot(carbon_missing_df, aes(x = year, y = fct_reorder(country, -carbon_emissions, sum), fill = Missing)) +
geom_tile(color = "white") +
ggtitle("Carbon") +
scale_fill_viridis_d() + # discrete scale
theme(axis.text.x=element_text(angle=90, hjust=1))

# Data Processing for Hydroelectric
hydro_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Hydro Generation - TWh")
hydro_generation_df <- process_space_time_data(hydro_generation, x_start=1, x_end=55, y_start=4, y_end=106)
hydro_generation_df <- hydro_generation_df %>%
gather(-year, key = "country", value = "generation") %>%
mutate(Missing = ifelse(is.na(generation) | generation == 0, "yes", "no"))
ggplot(hydro_generation_df, aes(x = year, y = fct_reorder(country, -generation, sum), fill = Missing)) +
geom_tile(color = "white") +
ggtitle("Hydro generation") +
scale_fill_viridis_d() + # discrete scale
theme(axis.text.x=element_text(angle=90, hjust=1))

# Missing data for wind generation
wind_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Wind Generation - TWh ")
wind_generation_df <- process_space_time_data(wind_generation, x_start=1, x_end=55, y_start=4, y_end=108)
wind_generation_df <- wind_generation_df %>%
gather(-year, key = "country", value = "generation") %>%
mutate(Missing = ifelse(is.na(generation), "yes", "no"))
ggplot(wind_generation_df, aes(x = year, y = fct_reorder(country, -generation, sum), fill = Missing)) +
geom_tile(color = "white") +
ggtitle("Wind generation") +
scale_fill_viridis_d() + # discrete scale
theme(axis.text.x=element_text(angle=90, hjust=1))

# Missing data for solar generation
solar_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Solar Generation - TWh")
solar_generation_df <- process_space_time_data(solar_generation, x_start=1, x_end=55, y_start=4, y_end=108)
solar_generation_df <- solar_generation_df %>%
gather(-year, key = "country", value = "generation") %>%
mutate(Missing = ifelse(is.na(generation), "yes", "no"))
ggplot(solar_generation_df, aes(x = year, y = fct_reorder(country, -generation, sum), fill = Missing)) +
geom_tile(color = "white") +
ggtitle("Solar generation") +
scale_fill_viridis_d() + # discrete scale
theme(axis.text.x=element_text(angle=90, hjust=1))

# DEPRECATED
# Likert style climate change survey draft
# SOURCE: https://climatecommunication.yale.edu/visualizations-data/ycom-us/
# library(HH) # diverging stacked bar chart
# ycom <- read_csv("data/YCOM_2019_Data.csv")
# ycom_metadata <- read_csv("data/YCOM_2019_Metadata.csv")
#
# # BELIEFS
# happening <- ycom[1,] %>%
# dplyr::select('happeningOppose', 'happening') %>%
# rename(disagree = happeningOppose, agree = happening) %>%
# add_column(question = c('Is climate change even for realz?'), .before = TRUE)
#
# human <- ycom[1,] %>%
# dplyr::select( 'humanOppose', 'human') %>%
# rename(disagree = humanOppose, agree = human) %>%
# add_column(question = c('is it all cuz of humans'), .before = TRUE)
#
# consensus <- ycom[1,] %>%
# dplyr::select('consensusOppose', 'consensus') %>%
# rename(disagree = consensusOppose, agree = consensus) %>%
# add_column(question = c('do scientists agree?'), .before = TRUE)
#
# affectweather <- ycom[1,] %>%
# dplyr::select('affectweatherOppose', 'affectweather') %>%
# rename(disagree = affectweatherOppose, agree = affectweather) %>%
# add_column(question = c('is New York getting colder cuz of clim@te change'), .before = TRUE)
#
# beliefs <- bind_rows(happening, human, consensus, affectweather)
#
# HH::likert(question~., beliefs,
# positive.order = TRUE,
# main = list("Beliefs"))
# Data Processing for D3 Interactive, Animation part and simple transitions only
oil <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Elec Gen from Oil")
gas <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Elec Gen from Gas")
carbon <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Carbon Dioxide Emissions")
# Process data into space-time
oil_df <- process_space_time_data(oil, x_start=1, x_end=35, y_start=4, y_end=50)
gas_df <- process_space_time_data(gas, x_start=1, x_end=35, y_start=4, y_end=50)
carbon_df <- process_space_time_data(carbon, x_start=1, x_end=55, y_start=4, y_end=107)
# Get intersection of countries
countries <- intersect(intersect(colnames(oil_df), colnames(gas_df)), colnames(carbon_df))
oil_df <- oil_df[countries]
gas_df <- gas_df[countries]
carbon_df <- carbon_df[countries]
carbon_df <- carbon_df %>% filter(year >= 1985) # Get 1985 onwards only
# Concatenate
data <- data.frame(oil_df, gas_df, carbon_df)
# Write
#write.csv(data,'data_many_countries.csv', row.names = TRUE)
# Data Processing for D3 Renewables vs Emissions
carbon <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Carbon Dioxide Emissions")
carbon_df <- process_space_time_data(carbon, x_start=1, x_end=55, y_start=4, y_end=107)
carbon_df <- tail(carbon_df, 1)
renewable <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Renewables - TWh")
renewable_df <- process_space_time_data(renewable, x_start=1, x_end=55, y_start=4, y_end=107)
renewable_df <- tail(renewable_df, 1)
carbon_df <- carbon_df %>%
gather(-year, key = "country", value = "emissions") %>%
select(c("country", "emissions"))
renewable_df <- renewable_df %>%
gather(-year, key = "country", value = "renewables") %>%
select(c("country", "renewables"))
temp <- data.frame(carbon_df, renewable_df) %>% select(c('country', emissions, renewables))
write.csv(temp,'temp.csv', row.names = TRUE)
# Data Processing for Emissions
carbon <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Carbon Dioxide Emissions")
carbon_df <- process_space_time_data(carbon, x_start=1, x_end=55, y_start=4, y_end=107)
# Tidy data
carbon_df <- carbon_df %>%
gather(key = "country", value = "generation", -year)
# Write to CSV
write.csv(carbon_df,'carbon_generation.csv', row.names = TRUE)
# Data Processing for Solar Generation
solar_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Solar Generation - TWh")
solar_generation_df <- process_space_time_data(solar_generation, x_start=1, x_end=55, y_start=4, y_end=108)
# Tidy data
solar_generation_df <- solar_generation_df %>%
gather(key = "country", value = "generation", -year)
# Write to CSV
write.csv(solar_generation_df,'solar_generation.csv', row.names = TRUE)
# Data Processing for Wind Generation
wind_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Wind Generation - TWh ")
wind_generation_df <- process_space_time_data(wind_generation, x_start=1, x_end=55, y_start=4, y_end=108)
# Tidy data
wind_generation_df <- wind_generation_df %>%
gather(key = "country", value = "generation", -year)
# Write to CSV
write.csv(wind_generation_df,'wind_generation.csv', row.names = TRUE)
# Data Processing for Biomass
biomass_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Geo Biomass Other - TWh")
biomass_generation_df <- process_space_time_data(biomass_generation, x_start=1, x_end=55, y_start=4, y_end=108)
# Tidy data
biomass_generation_df <- biomass_generation_df %>%
gather(key = "country", value = "generation", -year)
# Write to CSV
write.csv(biomass_generation_df,'biomass_generation.csv', row.names = TRUE)
# Data Processing for Biofuel
biofuel_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Biofuels Production - Kboed")
biofuel_generation_df <- process_space_time_data(biofuel_generation, x_start=1, x_end=30, y_start=4, y_end=43)
# Tidy data
biofuel_generation_df <- biofuel_generation_df %>%
gather(key = "country", value = "generation", -year)
# Write to CSV
write.csv(biofuel_generation_df,'biofuel_generation.csv', row.names = TRUE)
# Data Processing for Hydroelectric
hydro_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Hydro Generation - TWh")
hydro_generation_df <- process_space_time_data(hydro_generation, x_start=1, x_end=55, y_start=4, y_end=106)
# Tidy data
hydro_generation_df <- hydro_generation_df %>%
gather(key = "country", value = "generation", -year)
hydro_generation_df
# Write to CSV
write.csv(hydro_generation_df,'hydro_generation.csv', row.names = TRUE)
# Data Processing for Total Renewables
renewables_generation <- read_excel("data/bp-stats-review-2019-all-data.xlsx", sheet="Renewables - TWh")
renewables_generation_df <- process_space_time_data(renewables_generation, x_start=1, x_end=55, y_start=4, y_end=106)
# Tidy data
renewables_generation_df <- renewables_generation_df %>%
gather(key = "country", value = "generation", -year)
# Add Hydro into renewables
renewables_generation_df['generation'] <- renewables_generation_df['generation'] + hydro_generation_df['generation']
# Write to CSV
write.csv(renewables_generation_df,'renewables_generation.csv', row.names = TRUE)